#Alexander F Gazmararian
#afg2@princeton.edu

#load packages
library(modelsummary)
library(tidyverse)

#load functions
source("code/fun/savefig.r")
source("code/fun/book_theme.r")
source("code/fun/coefnames4tables.r")
source("code/fun/fix_txt.r")

#load data
g <- readRDS("data/NatYouthQual.rds")
g$youth_block <- ifelse(g$youth_block == "Youth", "Youth", "Adult")

# Create Figure 7.1 ----

#create data for plot
g_long <-
  g %>%
  pivot_longer(cols = c(YouthInterest_1:YouthInterest_10)) %>%
  separate(name, into = c("question", "industry"), sep = "_") %>%
  mutate(
    industry = dplyr::case_when(
      industry == "1" ~ "Solar Energy",
      industry == "2" ~ "Wind Energy",
      industry == "3" ~ "Coal Mining",
      industry == "4" ~ "Oil and Gas",
      industry == "5" ~ "Healthcare",
      industry == "6" ~ "Trucking",
      industry == "7" ~ "Computer Programming",
      industry == "8" ~ "Energy Efficiency",
      industry == "9" ~ "Auto Manufacturing",
      industry == "10" ~ "Environmental Cleanup"
    ),
    value = ifelse(value %in% c("Very interested", "Moderately interested"), 1, 0)
  )

# Create indicator for adjustment cost
g_long$mobility_bin <- ifelse(g_long$mobility %in% c("Large adjustment", "Moderate adjustment"), 0, 1)

# Create confidence intervals
plot.data <-
  g_long %>%
  group_by(industry, youth_block) %>%
  summarise(
    n = n(),
    pct = mean(value),
    se = sd(value)/sqrt(n),
    lb90 = pct + qnorm(0.05) * se,
    ub90 = pct + qnorm(0.95) * se,
    lb95 = pct + qnorm(0.025) * se,
    ub95 = pct + qnorm(0.975) * se
  ) %>%
  group_by(industry) %>%
  mutate(pct_avg = mean(pct))

# Order from top to bottom
career_order <-
  plot.data %>%
  dplyr::select(industry,pct_avg) %>%
  unique() %>%
  dplyr::arrange(pct_avg)
career_order <- career_order$industry

# Plot aesthetics
dodgewidth <- .5

# Create plot
interest.plot <-
  plot.data %>%
  ggplot(aes(x=industry,y=pct,color=youth_block,shape=youth_block,label=scales::percent(pct, accuracy=1))) +
  geom_errorbar(aes(ymin=lb95,ymax=ub95),width=0,position=position_dodge(dodgewidth)) +
  geom_errorbar(aes(ymin=lb90,ymax=ub90),width=0,size=1.75,position=position_dodge(dodgewidth)) +
  geom_point(size=4,position=position_dodge(dodgewidth)) +
  book_theme +
  scale_y_continuous(labels = scales::percent) +
  scale_x_discrete(limits = career_order) +
  coord_flip() +
  scale_color_grey() +
  labs(y="Interested in career",x="",color="",shape="") +
  theme(
    legend.position = "bottom",
    panel.grid.major.y = element_line(color = "grey", size = .1)
  )
savefig(interest.plot, "7.1_figure_careerinterest", height = 3, filepath = "figures/")


# Expectations of adjustment cost for online appendix ----

# Create plot
mobility.desc <-
  g %>%
  group_by(mobility, youth_block) %>%
  count() %>%
  group_by(youth_block) %>%
  mutate(pct = prop.table(n)) %>%
  ggplot(aes(x=mobility,pct,fill=youth_block,label=scales::percent(pct,accuracy=1))) +
  geom_col(position="dodge") +
  geom_text(position=position_dodge(.9),vjust=-.5,size=3) +
  scale_fill_grey() +
  scale_y_continuous(labels = scales::percent,expand=c(0,0),limits=c(0,.45)) +
  labs(x="",y="",fill="") +
  book_theme +
  theme(legend.position="bottom",legend.box.margin = margin(t=-20))
mobility.desc
savefig(mobility.desc, "si_mobility", height = 2.1, filepath = "figures/ch7/")

# Regression models
m <- list()
m[[1]] <- lm(mobility_bin ~ youth_block, g)
m[[2]] <- lm(mobility_bin ~ youth_block + Female + income5 + CollegeIntend + employfull + Black + Hispanic + PartySummary, g)
m[[3]] <- lm(mobility_scale ~ youth_block, g)
m[[4]] <- lm(mobility_scale ~ youth_block + Female + income5 + CollegeIntend + employfull + Black + Hispanic + PartySummary, g)
file <- "tables/ch7/ols_mobility.txt"
modelsummary(
  m,
  stars = c("*"=.1,"**"=.05,"***"=.01),
  vcov = "HC2",
  coef_map = coefnames,
  gof_map = c("nobs", "adj.r.squared"),
  escape = FALSE,
  output = "latex"
) %>%
  cat(., file = file)
fix_txt(file)


# Within-subject comparisons for online appendix ----

#estimate models
mobility.outcomes <- c("health_vgreen", "green_v_ff", "health_v_ff")

#mobility.outcomes <- c("solar_v_health", "wind_v_health", "solar_v_coal", "solar_v_og", "wind_v_coal", "wind_v_og")
mobility.ols <- lapply(mobility.outcomes,
                       FUN = function(x) {lm(as.formula(paste0(x, "~ mobility_bin_low + youth_block + age_z + Female +",
                                                               "Black + Hispanic + PartySummary + HighestEd + Income_Incl + employfull + ruralhrsa")), g)})
#create plot
mobility.df <- modelsummary::modelplot(mobility.ols, vcov = "HC3", draw = FALSE, conf_level = .9)
mobility.df.95 <- modelsummary::modelplot(mobility.ols, vcov = "HC3", draw = FALSE, conf_level = .95)
mobility.df <- subset(mobility.df, term == "mobility_bin_low")
mobility.df.95 <- subset(mobility.df.95, term == "mobility_bin_low")
names(mobility.df.95)[c(5,6)] <- c("conf.low.95", "conf.high.95")
mobility.df$Contrast <- c("Healthcare vs. Green", "Green vs. Fossil Fuels", "Healthcare vs. Fossil Fuels")
mobility.df.95$Contrast <- c("Healthcare vs. Green", "Green vs. Fossil Fuels", "Healthcare vs. Fossil Fuels")
mobility.df <- cbind(mobility.df, mobility.df.95[, c(5,6)])
mobility.df
relative.plot <-
  mobility.df %>%
  ggplot() +
  geom_hline(yintercept=0,lty="dashed",color="grey") +
  geom_errorbar(aes(x=Contrast,ymin=conf.low.95,ymax=conf.high.95),width=0)+
  geom_errorbar(aes(x=Contrast,ymin=conf.low,ymax=conf.high),width=0,size=1) +
  geom_point(aes(x = Contrast, y = estimate), size = 4) +
  coord_flip() +
  book_theme +
  labs(
    x="",
    y="Effect of low mobility on preference for Industry 1 vs. Industry 2"
  )
relative.plot
savefig(relative.plot, "si_mobility_within", height = 1.8, filepath = "figures/ch7/")
